Imputation
source("imputation.R", local = knitr::knit_global())
## reg age sex hgt wgt
## 1012 983 987 1005 1030


MICE: Wight
MICE:compare the imputed datasets with orignal dataset
df_mice_wgt <- create_compare_data(data,miss_data,impt_mice_data,col = "wgt",method = "mice",sp_impt="method")
ggplot(df_mice_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_wgt, aes(source,wgt, colour = source))+geom_boxplot()

ggplot(df_mice_wgt, aes(source,wgt, colour = source))+geom_boxplot(aes(colour=sex))

MICE:compare split with Sex
df_mice_wgt <- create_compare_data(data,miss_data,impt_mice_data,col = "wgt",method = "mice",sp_impt="sex")
ggplot(df_mice_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_wgt, aes(source,wgt, colour = source))+geom_boxplot()

MICE:compare by NA counts
ggplot(df_mice_wgt, aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_wgt, aes(na_count,wgt, colour = sex))+geom_boxplot()

ggplot(df_mice_wgt[df_mice_wgt$na_count==4 |df_mice_wgt$na_count=="True(0 na)",], aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

MICE:compare split with age 14
# age above 14
ggplot(df_mice_wgt[df_mice_wgt$age >= 14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

# age below 14
ggplot(df_mice_wgt[df_mice_wgt$age <14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

Ranger: Wight
Ranger:compare the imputed datasets with orignal dataset
df_ranger_wgt <- create_compare_data(data,miss_data,impt_ranger_data,col = "wgt",method = "ranger",sp_impt="method")
ggplot(df_ranger_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_ranger_wgt, aes(source,wgt, colour = source))+geom_boxplot()

ggplot(df_ranger_wgt, aes(source,wgt, colour = source))+geom_boxplot(aes(colour=sex))

Ranger:compare split with Sex
df_ranger_wgt <- create_compare_data(data,miss_data,impt_ranger_data,col = "wgt",method = "ranger",sp_impt="sex")
ggplot(df_ranger_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_ranger_wgt, aes(source,wgt, colour = source))+geom_boxplot()

Ranger:compare by NA counts
ggplot(df_ranger_wgt, aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

ggplot(df_ranger_wgt, aes(na_count,wgt, colour = sex))+geom_boxplot()

ggplot(df_ranger_wgt[df_ranger_wgt$na_count==4 |df_ranger_wgt$na_count=="True(0 na)",], aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Ranger:compare split with age 14
# age above 14
ggplot(df_ranger_wgt[df_ranger_wgt$age >= 14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# age below 14
ggplot(df_ranger_wgt[df_ranger_wgt$age <14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

MIDAS: Wight
MIDAS:compare the imputed datasets with orignal dataset
df_midas_wgt <- create_compare_data(data,miss_data,impt_rmidas_data,col = "wgt",method = "midas",sp_impt="method")
ggplot(df_midas_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_wgt, aes(source,wgt, colour = source))+geom_boxplot()

ggplot(df_midas_wgt, aes(source,wgt, colour = source))+geom_boxplot(aes(colour=sex))

MIDAS:compare split with Sex
df_midas_wgt <- create_compare_data(data,miss_data,impt_rmidas_data,col = "wgt",method = "midas",sp_impt="sex")
ggplot(df_midas_wgt, aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_wgt, aes(source,wgt, colour = source))+geom_boxplot()

MIDAS:compare by NA counts
ggplot(df_midas_wgt, aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_wgt, aes(na_count,wgt, colour = sex))+geom_boxplot()

ggplot(df_midas_wgt[df_midas_wgt$na_count==4 |df_midas_wgt$na_count=="True(0 na)",], aes(age,wgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

MIDAS:compare split with age 14
# age above 14
ggplot(df_midas_wgt[df_midas_wgt$age >= 14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

# age below 14
ggplot(df_midas_wgt[df_midas_wgt$age <14,], aes(age,wgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

MICE: Hight
MICE:compare the imputed datasets with orignal dataset
df_mice_hgt <- create_compare_data(data,miss_data,impt_mice_data,col = "hgt",method = "mice",sp_impt="method")
ggplot(df_mice_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_hgt, aes(source,hgt, colour = source))+geom_boxplot()

ggplot(df_mice_hgt, aes(source,hgt, colour = source))+geom_boxplot(aes(colour=sex))

MICE:compare split with Sex
df_mice_hgt <- create_compare_data(data,miss_data,impt_mice_data,col = "hgt",method = "mice",sp_impt="sex")
ggplot(df_mice_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_hgt, aes(source,hgt, colour = source))+geom_boxplot()

MICE:compare by NA counts
ggplot(df_mice_hgt, aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_mice_hgt, aes(na_count,hgt, colour = source))+geom_boxplot()

ggplot(df_mice_hgt[df_mice_hgt$na_count==4 |df_mice_hgt$na_count=="True(0 na)",], aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

MICE:compare split with age 14
# age above 14
ggplot(df_mice_hgt[df_mice_hgt$age >= 14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

# age below 14
ggplot(df_mice_hgt[df_mice_hgt$age <14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

Ranger: Hight
Ranger:compare the imputed datasets with orignal dataset
df_ranger_hgt <- create_compare_data(data,miss_data,impt_ranger_data,col = "hgt",method = "ranger",sp_impt="method")
ggplot(df_ranger_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_ranger_hgt, aes(source,hgt, colour = source))+geom_boxplot()

ggplot(df_ranger_hgt, aes(source,hgt, colour = source))+geom_boxplot(aes(colour=sex))

Ranger:compare split with Sex
df_ranger_hgt <- create_compare_data(data,miss_data,impt_ranger_data,col = "hgt",method = "ranger",sp_impt="sex")
ggplot(df_ranger_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_ranger_hgt, aes(source,hgt, colour = source))+geom_boxplot()

Ranger:compare by NA counts
ggplot(df_ranger_hgt, aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_ranger_hgt, aes(na_count,hgt, colour = sex))+geom_boxplot()

ggplot(df_ranger_hgt[df_ranger_hgt$na_count==4 |df_ranger_hgt$na_count=="True(0 na)",], aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

Ranger:compare split with age 14
# age above 14
ggplot(df_ranger_hgt[df_ranger_hgt$age >= 14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

# age below 14
ggplot(df_ranger_hgt[df_ranger_hgt$age <14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

MIDAS: Hight
MIDAS:compare the imputed datasets with orignal dataset
df_midas_hgt <- create_compare_data(data,miss_data,impt_rmidas_data,col = "hgt",method = "midas",sp_impt="method")
ggplot(df_midas_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_hgt, aes(source,hgt, colour = source))+geom_boxplot()

ggplot(df_midas_hgt, aes(source,hgt, colour = source))+geom_boxplot(aes(colour=sex))

MIDAS:compare split with Sex
df_midas_hgt <- create_compare_data(data,miss_data,impt_rmidas_data,col = "hgt",method = "midas",sp_impt="sex")
ggplot(df_midas_hgt, aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_hgt, aes(source,hgt, colour = source))+geom_boxplot()

MIDAS:compare by NA counts
ggplot(df_midas_hgt, aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

ggplot(df_midas_hgt, aes(na_count,hgt, colour = sex))+geom_boxplot()

ggplot(df_midas_hgt[df_midas_hgt$na_count==4 |df_midas_hgt$na_count=="True(0 na)",], aes(age,hgt, colour = na_count))+geom_point(alpha=0.4)+stat_smooth()

MIDAS:compare split with age 14
# age above 14
ggplot(df_midas_hgt[df_midas_hgt$age >= 14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

# age below 14
ggplot(df_midas_hgt[df_midas_hgt$age <14,], aes(age,hgt, colour = source))+geom_point(alpha=0.4)+stat_smooth()

compare miss to true data:wgt
miss_index <- which(is.na(miss_data$wgt))
for (i in 1:10){
sex <- factor(data$sex[miss_index])
g1 <- qplot(data$wgt[miss_index],impt_mice_data[[3]]$wgt[miss_index],col=sex)+stat_smooth()+
ylab("mice wgt") + xlab("data wgt")+theme(legend.position = "top")
g2 <- qplot(data$wgt[miss_index],impt_ranger_data[[3]]$wgt[miss_index],col=sex)+stat_smooth()+
ylab("ranger wgt") + xlab("data wgt")+theme(legend.position = "top")
g3 <- qplot(data$wgt[miss_index],impt_rmidas_data[[3]]$wgt[miss_index],col=sex)+stat_smooth()+
ylab("midas wgt") + xlab("data wgt")+theme(legend.position = "top")
grid.arrange(g1, g2,g3, ncol=3)
}
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
